This report summarizes the analysis, including descriptive statistics, ANOVA, Response Surface Modeling, and Cross-Validation. It also provides various visualizations to interpret the results effectively.
check_and_install <- function(package) {
if (!requireNamespace(package, quietly = TRUE)) {
install.packages(package, dependencies = TRUE)
}
}
required_packages <- c("dplyr", "ggplot2", "car", "rsm", "caret", "gridExtra", "plotly")
for (pkg in required_packages) {
check_and_install(pkg)
}
library(dplyr)
library(ggplot2)
library(car)
library(rsm)
library(caret)
library(gridExtra)
library(plotly)
print("All required libraries are installed and loaded.")
## [1] "All required libraries are installed and loaded."
data <- read.csv("~/Downloads/dataupdate.csv")
print("Dataset Preview:")
## [1] "Dataset Preview:"
head(data)
## Run Bacteria Cementation pH Zeolite comactive_effort consolidation
## 1 1 2.24e+09 0.75 6.5 10 2693.3 0.114
## 2 2 3.50e+09 1.00 3.5 6 2693.3 0.089
## 3 3 3.50e+09 0.50 3.5 10 1009.2 0.116
## 4 4 1.50e+09 1.00 3.5 6 1009.2 0.128
## 5 5 2.24e+09 0.75 6.5 10 593.3 0.075
## 6 6 3.50e+09 0.75 9.5 6 1009.2 0.092
## permeability shear_strength X.Erodibility calcite_form Ammonia_Conc
## 1 8.6e-06 18.92 39.03 3.85 3.86e-06
## 2 5.8e-06 16.70 35.82 2.00 3.04e-06
## 3 7.1e-06 22.14 24.56 7.03 1.39e-06
## 4 7.5e-06 31.21 16.59 5.65 3.86e-06
## 5 4.9e-06 17.79 45.13 5.43 4.68e-06
## 6 4.1e-06 6.71 34.77 8.75 2.22e-06
summary_stats <- data %>%
summarise(across(where(is.numeric), list(
mean = ~ mean(.),
sd = ~ sd(.),
median = ~ median(.),
min = ~ min(.),
max = ~ max(.)
)))
summary_stats
## Run_mean Run_sd Run_median Run_min Run_max Bacteria_mean Bacteria_sd
## 1 16 9.092121 16 1 31 2512903226 849918402
## Bacteria_median Bacteria_min Bacteria_max Cementation_mean Cementation_sd
## 1 2.24e+09 1.5e+09 3.5e+09 0.733871 0.2134585
## Cementation_median Cementation_min Cementation_max pH_mean pH_sd pH_median
## 1 0.75 0.5 1 6.5 2.569047 6.5
## pH_min pH_max Zeolite_mean Zeolite_sd Zeolite_median Zeolite_min Zeolite_max
## 1 3.5 9.5 6.387097 3.480205 6 2 10
## comactive_effort_mean comactive_effort_sd comactive_effort_median
## 1 1431.713 901.5663 1009.2
## comactive_effort_min comactive_effort_max consolidation_mean consolidation_sd
## 1 593.3 2693.3 0.1061935 0.04230557
## consolidation_median consolidation_min consolidation_max permeability_mean
## 1 0.114 0.035 0.188 8.787419e-06
## permeability_sd permeability_median permeability_min permeability_max
## 1 5.737605e-06 7.5e-06 1.21e-06 2.91e-05
## shear_strength_mean shear_strength_sd shear_strength_median
## 1 16.91194 8.166283 17.02
## shear_strength_min shear_strength_max X.Erodibility_mean X.Erodibility_sd
## 1 6.11 39.7 29.82323 14.93195
## X.Erodibility_median X.Erodibility_min X.Erodibility_max calcite_form_mean
## 1 25.04 5.49 67.62 4.675161
## calcite_form_sd calcite_form_median calcite_form_min calcite_form_max
## 1 2.043531 4.18 1.78 8.75
## Ammonia_Conc_mean Ammonia_Conc_sd Ammonia_Conc_median Ammonia_Conc_min
## 1 2.668226e-06 1.120157e-06 2.22e-06 5.75e-07
## Ammonia_Conc_max
## 1 5.51e-06
anova_model <- aov(consolidation ~ Bacteria + Cementation + pH + Zeolite + comactive_effort, data = data)
anova_summary <- summary(anova_model)
anova_summary
## Df Sum Sq Mean Sq F value Pr(>F)
## Bacteria 1 0.00047 0.000471 0.247 0.623
## Cementation 1 0.00326 0.003256 1.712 0.203
## pH 1 0.00147 0.001467 0.771 0.388
## Zeolite 1 0.00082 0.000820 0.431 0.517
## comactive_effort 1 0.00013 0.000134 0.070 0.793
## Residuals 25 0.04755 0.001902
rsm_model <- rsm(consolidation ~ FO(Bacteria, Cementation, pH, Zeolite, comactive_effort), data = data)
rsm_summary <- summary(rsm_model)
rsm_summary
##
## Call:
## rsm(formula = consolidation ~ FO(Bacteria, Cementation, pH, Zeolite,
## comactive_effort), data = data)
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1218e-01 4.4903e-02 2.4982 0.01942 *
## Bacteria -4.9114e-12 9.6152e-12 -0.5108 0.61397
## Cementation 4.8909e-02 3.7783e-02 1.2945 0.20733
## pH -2.5182e-03 3.1594e-03 -0.7971 0.43292
## Zeolite -1.5213e-03 2.3091e-03 -0.6588 0.51602
## comactive_effort -2.4105e-06 9.0923e-06 -0.2651 0.79310
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Multiple R-squared: 0.1145, Adjusted R-squared: -0.06262
## F-statistic: 0.6464 on 5 and 25 DF, p-value: 0.6667
##
## Analysis of Variance Table
##
## Response: consolidation
## Df Sum Sq Mean Sq
## FO(Bacteria, Cementation, pH, Zeolite, comactive_effort) 5 0.006147 0.0012294
## Residuals 25 0.047546 0.0019018
## Lack of fit 20 0.042099 0.0021049
## Pure error 5 0.005447 0.0010894
## F value Pr(>F)
## FO(Bacteria, Cementation, pH, Zeolite, comactive_effort) 0.6464 0.6667
## Residuals
## Lack of fit 1.9322 0.2400
## Pure error
##
## Direction of steepest ascent (at radius 1):
## Bacteria Cementation pH Zeolite
## -1.002375e-10 9.981957e-01 -5.139420e-02 -3.104930e-02
## comactive_effort
## -4.919534e-05
##
## Corresponding increment in original units:
## Bacteria Cementation pH Zeolite
## -1.002375e-10 9.981957e-01 -5.139420e-02 -3.104930e-02
## comactive_effort
## -4.919534e-05
control <- trainControl(method = "cv", number = 10, savePredictions = "final")
cv_model <- train(consolidation ~ Bacteria + Cementation + pH + Zeolite + comactive_effort,
data = data,
method = "lm",
trControl = control)
cv_predictions <- cv_model$pred
ggplot(cv_predictions, aes(x = obs, y = pred)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "Cross-Validation: Observed vs Predicted",
x = "Observed Values",
y = "Predicted Values"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
pairs(data %>% select(where(is.numeric)), main = "Scatterplot Matrix")
boxplot_plot <- ggplot(data, aes(x = as.factor(Bacteria), y = consolidation)) +
geom_boxplot() +
labs(
title = "Boxplot of Consolidation by Bacteria",
x = "Bacteria",
y = "Consolidation"
) +
theme_minimal()
boxplot_plot
contour(rsm_model, ~ Bacteria + Cementation, main = "Response Surface: Bacteria and Cementation")
interaction.plot(data$Bacteria, data$Cementation, data$consolidation,
main = "Interaction Plot: Bacteria and Cementation",
xlab = "Bacteria", ylab = "Consolidation")
grid.arrange(
boxplot_plot,
ggplot(cv_predictions, aes(x = obs, y = pred)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "Cross-Validation: Observed vs Predicted",
x = "Observed Values",
y = "Predicted Values"
) +
theme_minimal(),
ncol = 2
)
## `geom_smooth()` using formula = 'y ~ x'
fig <- plot_ly(data = data,
x = ~Bacteria,
y = ~Cementation,
z = ~consolidation,
type = "scatter3d",
mode = "markers",
marker = list(size = 5, color = ~consolidation, colorscale = "Viridis", showscale = TRUE)) %>%
layout(title = "3D Visualization: Bacteria, Cementation, and Consolidation",
scene = list(
xaxis = list(title = 'Bacteria'),
yaxis = list(title = 'Cementation'),
zaxis = list(title = 'Consolidation')
))
fig